import numpy as np
import h5py
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
# Load CSV files into pandas DataFrames
data_files = {
"kyle": {
"jumping": {
"hand": "data/jumping/kjh_data.csv",
"front": "data/jumping/kjf_data.csv",
"back": "data/jumping/kjb_data.csv",
},
"walking": {
"hand": "data/walking/kwh_data.csv",
"front": "data/walking/kwf_data.csv",
"back": "data/walking/kwb_data.csv",
},
},
"abdellah": {
"jumping": {
"backright": "data/jumping/ajbrp_data.csv",
"frontleft": "data/jumping/ajflp_data.csv",
},
"walking": {
"backright": "data/walking/awbrp_data.csv",
"frontleft": "data/walking/awflp_data.csv",
},
},
"liam": {
"jumping": {
"backleft": "data/jumping/ljlbp_data.csv",
"backright": "data/jumping/ljbrp_data.csv",
},
"walking": {
"lefthand": "data/walking/lwlh_data.csv",
"leftpocket": "data/walking/lwlp_data.csv",
"righthand": "data/walking/lwrh_data.csv",
"rightpocket": "data/walking/lwrp_data.csv",
},
},
}
# Save DataFrames to HDF5 file
with h5py.File("activityData.hdf5", "w") as hdf5_file:
for person, activities in data_files.items():
for activity, areas in activities.items():
for area, file_path in areas.items():
df = pd.read_csv(file_path)
hdf5_file.create_dataset(f"{person}/{activity}/{area}", data=df.to_numpy())
features = ["Time (s)", "Linear Acceleration x (m/s^2)", "Linear Acceleration y (m/s^2)", "Linear Acceleration z (m/s^2)", "Absolute acceleration (m/s^2)"]
def preprocess_data(hdf5_file):
data_frames = {
"walking": [],
"jumping": []
}
for person in hdf5_file.keys():
for activity in hdf5_file[person].keys():
activity_data = []
for area in hdf5_file[person][activity].keys():
dataset = hdf5_file[person][activity][area]
df = pd.DataFrame(dataset, columns=features)
# Remove first 5 seconds and last 5 seconds of data
start_time = df["Time (s)"].min()
end_time = df["Time (s)"].max()
df = df[(df["Time (s)"] > start_time + 5) & (df["Time (s)"] < end_time - 5)]
activity_data.append(df)
# Concatenate all DataFrames related to the same activity for all persons
data_frames[activity].append(pd.concat(activity_data, ignore_index=True))
# Concatenate all DataFrames for each activity
for activity in data_frames.keys():
data_frames[activity] = pd.concat(data_frames[activity], ignore_index=True)
return data_frames
features = ["Time (s)", "Linear Acceleration x (m/s^2)", "Linear Acceleration y (m/s^2)", "Linear Acceleration z (m/s^2)", "Absolute acceleration (m/s^2)"]
def preprocessed_data(hdf5_file):
data_frames = {}
for person in hdf5_file.keys():
data_frames[person] = {}
for activity in hdf5_file[person].keys():
activity_data = []
for area in hdf5_file[person][activity].keys():
dataset = hdf5_file[person][activity][area]
df = pd.DataFrame(dataset, columns=features)
# Remove first 5 seconds and last 5 seconds of data
start_time = df["Time (s)"].min()
end_time = df["Time (s)"].max()
df = df[(df["Time (s)"] > start_time + 5) & (df["Time (s)"] < end_time - 5)]
activity_data.append(df)
# Concatenate all DataFrames related to the same activity for the same person
data_frames[person][activity] = pd.concat(activity_data, ignore_index=True)
return data_frames
with h5py.File("activityData.hdf5", "r") as hdf5_file:
preprocessed_data1 = preprocessed_data(hdf5_file)
def plot_acceleration_data1(preprocessed_data, person, activity):
df = preprocessed_data[person][activity]
fig, axes = plt.subplots(3, 1, figsize=(12, 12), sharex=True)
axes[0].plot(df["Time (s)"], df["Linear Acceleration x (m/s^2)"], label="x-axis", color='red')
axes[0].set_title(f"{activity.capitalize()} Activity for {person.capitalize()} (X-axis)")
axes[0].set_ylabel("Acceleration (m/s^2)")
axes[0].legend()
axes[1].plot(df["Time (s)"], df["Linear Acceleration y (m/s^2)"], label="y-axis", color='blue')
axes[1].set_title(f"{activity.capitalize()} Activity for {person.capitalize()} (Y-axis)")
axes[1].set_ylabel("Acceleration (m/s^2)")
axes[1].legend()
axes[2].plot(df["Time (s)"], df["Linear Acceleration z (m/s^2)"], label="z-axis", color='green')
axes[2].set_title(f"{activity.capitalize()} Activity for {person.capitalize()} (Z-axis)")
axes[2].set_xlabel("Time (s)")
axes[2].set_ylabel("Acceleration (m/s^2)")
axes[2].legend()
plt.tight_layout()
plt.show()
plot_acceleration_data1(preprocessed_data1, "kyle", "walking")
plot_acceleration_data1(preprocessed_data1, "kyle", "jumping")
plot_acceleration_data1(preprocessed_data1, "abdellah", "walking")
plot_acceleration_data1(preprocessed_data1, "abdellah", "jumping")
plot_acceleration_data1(preprocessed_data1, "liam", "walking")
plot_acceleration_data1(preprocessed_data1, "liam", "jumping")
# Read the HDF5 file and preprocess the data
with h5py.File("activityData.hdf5", "r") as hdf5_file:
preprocessed_data = preprocess_data(hdf5_file)
# Access the walking and jumping DataFrames
walking_data = preprocessed_data["walking"]
jumping_data = preprocessed_data["jumping"]
jumping_data
| Time (s) | Linear Acceleration x (m/s^2) | Linear Acceleration y (m/s^2) | Linear Acceleration z (m/s^2) | Absolute acceleration (m/s^2) | |
|---|---|---|---|---|---|
| 0 | 5.013552 | -2.933793 | 11.037301 | 3.719913 | 12.011116 |
| 1 | 5.023593 | -2.416790 | 11.771116 | 3.588315 | 12.540975 |
| 2 | 5.033633 | -2.126614 | 12.072641 | 2.344312 | 12.480663 |
| 3 | 5.043674 | -1.933966 | 11.783559 | 0.807484 | 11.968480 |
| 4 | 5.053715 | -1.944011 | 11.346681 | 0.003355 | 11.512009 |
| ... | ... | ... | ... | ... | ... |
| 47677 | 97.299237 | 2.557272 | -13.176605 | 5.568800 | 14.531830 |
| 47678 | 97.309172 | 2.165217 | -12.374189 | 4.263774 | 13.266065 |
| 47679 | 97.319107 | 1.679658 | -11.476837 | 3.132702 | 12.014693 |
| 47680 | 97.329042 | 1.012671 | -10.766406 | 2.379240 | 11.072569 |
| 47681 | 97.338976 | 0.386872 | -10.057555 | 2.027284 | 10.267130 |
47682 rows × 5 columns
import seaborn as sns
import matplotlib.pyplot as plt
def visualize_data(df, title):
# Create a pair plot to visualize relationships between features
sns.pairplot(df)
plt.suptitle(f'{title} Pair Plot', y=1.02, fontsize=14)
plt.show()
# Create a correlation heatmap to visualize correlations between features
corr = df.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title(f'{title} Correlation Heatmap', fontsize=14)
plt.show()
# Visualize walking data
visualize_data(walking_data, "Walking")
# Visualize jumping data
visualize_data(jumping_data, "Jumping")
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) ~\AppData\Local\Temp\ipykernel_12172\3055048744.py in <module> 21 visualize_data(jumping_data, "Jumping") 22 ---> 23 print(walking_data.corr.to_string()) AttributeError: 'function' object has no attribute 'to_string'
walking_data.corr()
| Time (s) | Linear Acceleration x (m/s^2) | Linear Acceleration y (m/s^2) | Linear Acceleration z (m/s^2) | Absolute acceleration (m/s^2) | |
|---|---|---|---|---|---|
| Time (s) | 1.000000 | -0.073506 | -0.453393 | -0.122208 | 0.306061 |
| Linear Acceleration x (m/s^2) | -0.073506 | 1.000000 | -0.009823 | 0.247693 | 0.262401 |
| Linear Acceleration y (m/s^2) | -0.453393 | -0.009823 | 1.000000 | 0.209656 | -0.423656 |
| Linear Acceleration z (m/s^2) | -0.122208 | 0.247693 | 0.209656 | 1.000000 | -0.029331 |
| Absolute acceleration (m/s^2) | 0.306061 | 0.262401 | -0.423656 | -0.029331 | 1.000000 |
import matplotlib.ticker as ticker
def plot_acceleration_data2(df, person, activity):
fig, axes = plt.subplots(3, 1, figsize=(12, 12), sharex=True)
axes[0].plot(df["Time (s)"], df["Linear Acceleration x (m/s^2)"], label="x-axis", color='red')
axes[0].set_title(f"{activity.capitalize()} Activity for {person.capitalize()} (X-axis)")
axes[0].set_ylabel("Acceleration (m/s^2)")
axes[0].xaxis.set_major_locator(ticker.AutoLocator())
axes[0].legend()
axes[1].plot(df["Time (s)"], df["Linear Acceleration y (m/s^2)"], label="y-axis", color='blue')
axes[1].set_title(f"{activity.capitalize()} Activity for {person.capitalize()} (Y-axis)")
axes[1].xaxis.set_major_locator(ticker.AutoLocator())
axes[1].set_ylabel("Acceleration (m/s^2)")
axes[1].legend()
axes[2].plot(df["Time (s)"], df["Linear Acceleration z (m/s^2)"], label="z-axis", color='green')
axes[2].set_title(f"{activity.capitalize()} Activity for {person.capitalize()} (Z-axis)")
axes[2].set_xlabel("Time (s)")
axes[2].set_ylabel("Acceleration (m/s^2)")
axes[2].xaxis.set_major_locator(ticker.AutoLocator())
axes[2].legend()
plt.tight_layout()
plt.show()
import pandas as pd
# Replace the file path with the actual path to your CSV file
for name in ['ajbrp','ajflp', 'kjb','kjf','ljbrp','ljlbp']:
csv_file_path = "data/jumping/"+str(name)+"_data.csv"
csv_data = pd.read_csv(csv_file_path)
if name in ['ajbrp','ajflp']:
test = 'Abdellah'
elif name in ['kjb','kjf']:
test = 'Kyle'
else:
test= 'Liam'
# Assuming the person's name is "Ljlbp" and the activity is "jumping"
plot_acceleration_data2(csv_data, name, "jumping")